
library(dplyr)
library(plyr)
library(ggplot2)
library(xtable)
library('stargazer')
library('miceadds')
library(readstata13)
library(mediation)
library(mvtnorm)
library(purrr)

#######################
### Setting up data ###
#######################

#read in data pruned data 
rueda <- read.dta13("data_imputed_pruned.dta")

#variable adjustment 
rueda <- subset(rueda, select = -c(`_mi_miss`)) 
rueda <- rueda[!is.na(rueda$incdist),]
rueda$incdist <- rueda$incdist/10000  
rueda$Rgdp <- rueda$Rgdp/10000

#according to Rueda 2018, can dichotomise rincd variable (p.230)

rueda$rincd_d <- ifelse(rueda$rincd > 3, 1, 0)
rueda$fear_d <- ifelse(rueda$fear > 2, 1, 0)  

########################
### motivating model ###
########################
# -> showing that rich more favourable towards redistribution in areas with high inequality 


income.range <- seq(min(rueda$incdist), max(rueda$incdist), 0.5)

multiple <- lapply(1:5, FUN = function(dat){
  
  pref.fit <- glm(rincd_d ~ Rgini*incdist + age + female + eduyrs + transue + 
                    transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                    skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                  family = binomial(link="probit"),
                  data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  
  sim.beta <- rmvnorm(10^5,
                      mean = coef(pref.fit),
                      sigma = vcov(pref.fit))
  
  pref.vec.high <- apply(model.matrix(pref.fit), MARGIN = 2, median)
  pref.vec.low <- apply(model.matrix(pref.fit), MARGIN = 2, median)
  
  pref.vec.low['Rgini'] <- 0.29
  pref.vec.high['Rgini'] <- 0.38
  pref.vec.low['Rgini'] <- 0.29
  
  #loop over income range
  
  redist <- lapply(income.range, FUN=function(i){
    pref.vec.high['incdist'] <- i
    pref.vec.low['incdist'] <- i
    pref.vec.high['Rgini:incdist'] <- i*0.38
    pref.vec.low['Rgini:incdist'] <- i*0.29
    pr.high <- pnorm(sim.beta %*% pref.vec.high)
    pr.low <- pnorm(sim.beta %*% pref.vec.low)
    o <- sapply(list(pr.high, pr.low),
                FUN=function(j){c(mean(j), sd(j), quantile(j, c(0.025, 0.5, 0.975)))})
    o <- data.frame(t(o))
    names(o) <- c('mean', 'SE', 'p025','median', 'p975')
    o$type <- c('High gini', 'Low gini')
    o$income <- i
    o$mi <- dat
    return(o)
  })
  
  redist.df <- as.data.frame(do.call(rbind, redist))
  
  return(redist.df)
})

multiple <- as.data.frame(do.call(rbind, multiple))
pi <- data.frame(cbind(multiple[multiple$mi == 1, ]$mean, multiple[multiple$mi == 2, ]$mean, 
                       multiple[multiple$mi == 3, ]$mean, multiple[multiple$mi == 4, ]$mean,
                       multiple[multiple$mi == 5, ]$mean ))
point_estimate <- apply(pi, MARGIN = 1, FUN = mean)

#create standard errors 

se <- data.frame(multiple[multiple$mi == 1, ]$SE, multiple[multiple$mi == 2, ]$SE, 
                 multiple[multiple$mi == 3, ]$SE, multiple[multiple$mi == 4, ]$SE,
                 multiple[multiple$mi == 5, ]$SE )

standard_errors <- sapply(1:nrow(pi), FUN = function(i){
  err <- mean(as.numeric(se[i, ])^2) + 
            (sum((as.numeric(pi[i, ]) - mean(as.numeric(pi[i, ])))^2)/(ncol(pi) - 1))*(1 + 1/ncol(pi))
  return(sqrt(err))
})

plot_df1 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, multiple$type), multiple.type == 'High gini'), income.range))
plot_df2 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, multiple$type), multiple.type == 'Low gini'), income.range))
plot_df <- as.data.frame(rbind(plot_df1, plot_df2))
# plot
#quantile(rueda$incdist, c(0, .95)) get x axis limits 
pref.plot <- ggplot(plot_df) +
  geom_line(aes(x=income.range,y=point_estimate,group=multiple.type,col=multiple.type)) +
  geom_ribbon(aes(x=income.range,ymin= point_estimate - 1.64*standard_errors,ymax= point_estimate + 1.64 *standard_errors, 
                  group=multiple.type,fill=multiple.type), alpha = 0.25) +
  xlab('Income Distance') + ylab('Probability of Supporting Redistribution') + theme_classic() +
  scale_color_grey(guide = F) + scale_fill_grey() + labs(fill = "Inequality") + xlim(-5, 5) + ylim(0.5, 0.9)
#geom_text(aes(x = 32, y = 0.50, label = "High gini"))  + 
#geom_text(aes(x = 20, y = 0.1, label = "Low gini"), col = 'Gray47')
pdf("motivating_plot_new.pdf")
pref.plot
dev.off()

#accompanying table 
# need to loop over imputed data set
original_table <- lapply(1:5, FUN = function(dat){
  pref.fit <- glm(rincd_d ~ Rgini*incdist + age + female + eduyrs + transue + 
                    transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                    skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                  family = binomial(link="probit"),
                  data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  df <- data.frame(do.call(cbind, list(pref.fit$coefficients, summary(pref.fit)$coefficients[, 2])))
  names(df) <- c('coef', 'se')                
  return(df)
})

#define for stargazer
pref.fit <- glm(rincd_d ~ Rgini*incdist + age + female + eduyrs + transue + 
                  transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                  skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                family = binomial(link="probit"),
                data = na.omit(rueda[rueda$`_mi_m` == 2,]))

#input list of df to calculate correct point estimate 
point_fn <- function(df_list){
  coef_df <- do.call(cbind, lapply(df_list, '[[', 'coef'))
  coef <- apply(coef_df, MARGIN = 1, FUN = mean)
  return(coef)
}
#calculating correct se's; again input a list o df's 
se_fn <- function(df_list){
  se_df <- do.call(cbind, lapply(df_list, '[[', 'se'))
  se_1 <- apply(se_df^2 , MARGIN = 1, FUN = mean)
  se_2 <- (sum((se_df - mean(se_df))^2)/(ncol(se_df)))*(1 + 1/ncol(se_df))
  return(sqrt(se_1 + se_2))
}

coefs <- point_fn(original_table)
ses <- se_fn(original_table)
# these are the correct se's/ coefs for the model above ^ 
names(coefs) <- names(coef(pref.fit))
names(ses) <- names(coef(pref.fit))

stargazer(pref.fit, type = "latex", title = "Redistribution Probit Regression Results", coef = list(coefs), se = list(ses))


#############################
#### FEAR OF CRIME MODEL ####
#############################
Rgini.range <- seq(min(rueda$Rgini), max(rueda$Rgini), 0.01)

fear.df <- lapply(1:5, FUN = function(dat){
  
  mediator.fit <- glm(fear_d ~ Rgini + incdist + age + female + eduyrs + transue + transnlf + 
                        hhmmb +   lowsup + smempl + whcol + blcol + noclass + victim  + 
                        Rforeign + Rgdp, family = binomial(link="probit"),
                      data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  
  #simulate predicted probabilities over Rgini range 
  sim.beta <- rmvnorm(10^5,
                      mean = coef(mediator.fit),
                      sigma = vcov(mediator.fit))
  
  fear.vec <- apply(model.matrix(mediator.fit), MARGIN = 2, median)
  #loop over income range
  
  gini <- lapply(Rgini.range, FUN=function(i){
    fear.vec['Rgini'] <- i
    pr.fear <- pnorm(sim.beta %*% fear.vec)
    o <- sapply(list(pr.fear),
                FUN=function(j){c(mean(j), sd(j), quantile(j, c(0.025, 0.5, 0.975)))})
    o <- data.frame(t(o))
    names(o) <- c('mean', 'se', 'p025','median', 'p975')
    o$Rgini <- i
    o$mi <- dat
    return(o)
  })
  
  gini.df <- as.data.frame(do.call(rbind, gini))
  return(gini.df)
})

fear.df <- as.data.frame(do.call(rbind, fear.df))
pi <- data.frame(cbind(fear.df[fear.df$mi == 1, ]$mean, fear.df[fear.df$mi == 2, ]$mean, 
                       fear.df[fear.df$mi == 3, ]$mean, fear.df[fear.df$mi == 4, ]$mean,
                       fear.df[fear.df$mi == 5, ]$mean))
point_estimate <- apply(pi, MARGIN = 1, FUN = mean)

#create standard errors 

se <- data.frame(fear.df[fear.df$mi == 1, ]$se, fear.df[fear.df$mi == 2, ]$se, 
                 fear.df[fear.df$mi == 3, ]$se, fear.df[fear.df$mi == 4, ]$se,
                 fear.df[fear.df$mi == 5, ]$se)

standard_errors <- sapply(1:nrow(pi), FUN = function(i){
  err <- mean(as.numeric(se[i, ])^2) + 
    (sum((as.numeric(pi[i, ]) - mean(as.numeric(pi[i, ])))^2)/(ncol(pi) - 1))*(1 + 1/ncol(pi))
  return(sqrt(err))
})


plot_df1 <- data.frame(cbind(standard_errors, point_estimate, Rgini.range))


fear.plot <- ggplot(plot_df1) +
  geom_line(aes(x=Rgini.range,y=point_estimate)) +
  geom_ribbon(aes(x=Rgini.range,ymin= point_estimate - 1.96*standard_errors,ymax= point_estimate 
                  + 1.96*standard_errors), alpha = 0.25) +
  xlab('Regional Inequality (Rgini)') + ylab('Probability') + theme_classic() + 
  scale_color_grey(guide = F) + scale_fill_grey(guide = F) + labs(title = "Probability of Fearing Crime by Regional Inequality") +
  theme(plot.title = element_text(hjust = 0.5))
pdf("fear_plot_new.pdf")
fear.plot
dev.off()

#making table with correct pi's and se's using functions above

#loop over imputed data set
fear_table <- lapply(1:5, FUN = function(dat){
  mediator.fit <- glm(fear_d ~ Rgini + incdist + age + female + eduyrs + transue + transnlf + 
                        hhmmb +   lowsup + smempl + whcol + blcol + noclass + victim  + 
                        Rforeign + Rgdp, family = binomial(link="probit"),
                      data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  df <- data.frame(do.call(cbind, list(mediator.fit$coefficients, summary(mediator.fit)$coefficients[, 2])))
  names(df) <- c('coef', 'se')                
  return(df)
})

# define for stargazer
mediator.fit <- glm(fear_d ~ Rgini + incdist + age + female + eduyrs + transue + transnlf + 
                      hhmmb +   lowsup + smempl + whcol + blcol + noclass + victim  + 
                      Rforeign + Rgdp, family = binomial(link="probit"),
                    data = na.omit(rueda[rueda$`_mi_m` == 2,]))

#apply functions for correct SEs for fear model
coefs_fear <- round(point_fn(fear_table), 3)
ses_fear <- round(se_fn(fear_table), 3)

#generate named numeric vectors for stargazer output 
names(coefs_fear) <- names(coef(mediator.fit))
names(ses_fear) <- names(coef(mediator.fit))

#replace original coefs/ses from probit model with those from multiple imputation 
#stargazer(mediator.fit, type = "latex", title = "Probit Regression Results: Fear", coef = list(coefs_fear), se = list(ses_fear))
## ^ included with second-stage results 

########################################################
## REDISTRIBUTION PREFERENCES BY FEAR FOR REPLICATION ## 
########################################################
# pull predicted probabilites and get predicted values from fear model to fear to second-stage ordered probit 
# pull from model on original data (not imputed, because can only get 5 sets when running on imputed)
mediator.fit.original <- glm(fear_d ~ Rgini + incdist + age + female + eduyrs + transue + transnlf + 
                               hhmmb +   lowsup + smempl + whcol + blcol + noclass + victim  + 
                               Rforeign + Rgdp, family = binomial(link="probit"),
                             data = na.omit(rueda[rueda$`_mi_m` == 0,]))
mod.fear.orig <- predict(mediator.fit.original, type = "response")

# now pull predicted values on imputed datasets
pred.fear <- rep(list(NA), 5)
model.fear <- lapply(1:5, FUN = function(dat){
  mediator.fit <- glm(fear_d ~ Rgini + incdist + age + female + eduyrs + transue + transnlf + 
                        hhmmb +   lowsup + smempl + whcol + blcol + noclass + victim  + 
                        Rforeign + Rgdp, family = binomial(link="probit"),
                      data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  
  pred.fear[[dat]] <- predict(mediator.fit, type = "response")
})
model.fear <- as.data.frame(do.call(cbind, model.fear))
model.fear.new <- c(mod.fear.orig, model.fear$V1, model.fear$V2, model.fear$V3, model.fear$V4, model.fear$V5)
hist(model.fear.new)

## given randomness of correlation, using cutoff point as 75th percentile which is the ratio of yes/no in the true fear data
table(rueda.new$fear_d)
## find cutoff point 
quantile(model.fear.new, 0.75) #0.3
fear.yn <- ifelse(model.fear.new > 0.3, 1, 0)

## appears rate is about random/no correlation between fear data and model 
#have to na.omit on whole dataset pre-second stage in order to add modeled covariate 
rueda.new <- na.omit(rueda)
rueda.new$modeled.fear <- fear.yn
cor(rueda.new$fear_d, rueda.new$modeled.fear)

simple_roc <- function(labels, scores){
  labels <- labels[order(scores, decreasing=TRUE)]
  data.frame(TPR=cumsum(labels)/sum(labels), FPR=cumsum(!labels)/sum(!labels), labels)
}
glm_simple_roc <- simple_roc(rueda.new$fear_d == T, rueda.new$modeled.fear)
pdf("roc_plot.pdf")
roc <- ggplot() + geom_point(aes(x = glm_simple_roc$FPR, y = glm_simple_roc$TPR), alpha = 0.5, size = 0.1) + 
  theme(plot.title = element_text(hjust = 0.5)) + theme_bw() +
  labs(x = "False Positive Rate", y = "True Positive Rate", title = "ROC Curve for Modeling Fear of Crime") 

pdf("roc_plot.pdf")
roc
dev.off()



#(replace rueda with rueda.new)
multiple <- lapply(1:5, FUN = function(dat){
  
  two.stage <- glm(rincd_d ~ modeled.fear*incdist + Rgini*incdist + modeled.fear*Rgini + age + female + eduyrs + transue + 
                     transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                     skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                   family = binomial(link="probit"),
                   data = na.omit(rueda.new[rueda.new$`_mi_m` == dat,]))
  
  sim.beta <- rmvnorm(10^5,
                      mean = coef(two.stage),
                      sigma = vcov(two.stage))
  
  #loop over income range
  sim.vec.high <- apply(model.matrix(two.stage), MARGIN = 2, median)
  sim.vec.low <- sim.vec.high
  sim.vec.high['Rgini'] <- 0.38
  sim.vec.low['Rgini'] <- 0.29
  sim.vec.high.f <- sim.vec.high
  sim.vec.high.f['modeled.fear'] <- 1
  sim.vec.high.f['modeled.fear:Rgini'] <- 0.38
  sim.vec.low.f <- sim.vec.low
  sim.vec.low.f['modeled.fear'] <- 1
  sim.vec.low.f['modeled.fear:Rgini'] <- 0.29
  
  fear.inc <- lapply(income.range, FUN = function(i){
    sim.vec.high['incdist'] <- i
    sim.vec.high['incdist:Rgini'] <- 0.38*i
    sim.vec.low['incdist'] <- i
    sim.vec.low['incdist:Rgini'] <- 0.29*i
    sim.vec.high.f['incdist'] <- i
    sim.vec.high.f['incdist:Rgini'] <- 0.38*i
    sim.vec.high.f['modeled.fear:incdist'] <- i
    sim.vec.low.f['incdist'] <- i
    sim.vec.low.f['incdist:Rgini'] <- 0.29*i
    sim.vec.low.f['modeled.fear:incdist'] <- i
    pr.high <- pnorm(sim.beta%*%sim.vec.high)
    pr.low <- pnorm(sim.beta%*%sim.vec.low)
    pr.high.f <- pnorm(sim.beta%*%sim.vec.high.f)
    pr.low.f <- pnorm(sim.beta%*%sim.vec.low.f)
    o <- sapply(list(pr.high, pr.low, pr.high.f, pr.low.f),
                FUN=function(j){c(mean(j), sd(j), quantile(j, c(0.025, 0.5, 0.975)))})
    o <- data.frame(t(o))
    names(o) <- c('mean', 'se', 'p025','median', 'p975')
    o$type <- c('High gini', 'Low gini', 'High gini (fear)', 'Low gini (fear)')
    o$high <- c(1, 0, 1, 0)
    o$fear <- c(0, 0, 1, 1)
    o$income <- i
    o$mi <- dat
    return(o) })
  
  fear.df <- as.data.frame(do.call(rbind, fear.inc))
  return(fear.df)
})

fear.inc <- as.data.frame(do.call(rbind, multiple))
pi <- data.frame(cbind(fear.inc[fear.inc$mi == 1, ]$mean, fear.inc[fear.inc$mi == 2, ]$mean, 
                       fear.inc[fear.inc$mi == 3, ]$mean, fear.inc[fear.inc$mi == 4, ]$mean,
                       fear.inc[fear.inc$mi == 5, ]$mean))

point_estimate <- apply(pi, MARGIN = 1, FUN = mean)

#create standard errors 
se <- data.frame(fear.inc[fear.inc$mi == 1, ]$se, fear.inc[fear.inc$mi == 2, ]$se, 
                 fear.inc[fear.inc$mi == 3, ]$se, fear.inc[fear.inc$mi == 4, ]$se,
                 fear.inc[fear.inc$mi == 5, ]$se)

standard_errors <- sapply(1:nrow(pi), FUN = function(i){
  err <- mean(as.numeric(se[i, ])^2) + 
    (sum((as.numeric(pi[i, ]) - mean(as.numeric(pi[i, ])))^2)/(ncol(pi) - 1))*(1 + 1/ncol(pi))
  return(sqrt(err))
})

plot_df1 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == '1' & fear.inc.high == 1), income.range))
plot_df2 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == '1' & fear.inc.high == 0), income.range))
plot_df3 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == 0 & fear.inc.high == 1), income.range))
plot_df4 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == 0 & fear.inc.high == 0), income.range))
plot_df <- as.data.frame(rbind(plot_df1, plot_df2, plot_df3, plot_df4))

plot.varyfear.low <- ggplot(subset(plot_df, fear.inc.high == 0 )) +
  geom_line(aes(x=income.range,y=point_estimate,group=fear.inc.type,col=fear.inc.type)) +
  geom_ribbon(aes(x=income.range,ymin=point_estimate - 1.96*standard_errors,ymax=point_estimate + 
                    1.96*standard_errors, group=fear.inc.type,fill=fear.inc.type), alpha = 0.6) +
  xlab('Income') + ylab('Probability of Supporting Redistribution') + theme_classic() + 
  scale_color_grey(guide = F) + scale_fill_grey(labels = c("No Fear", "Fear")) + labs(fill = "", title = "Low Regional Inequality (Two-Stage Probit)") + 
  xlim(-5,5) + ylim(0.5, 0.9) + theme(plot.title = element_text(hjust = 0.5))
#geom_text(aes(x = 10, y = 0.2, label = "Fear of crime =  0"))  + 
#geom_text(aes(x = 25, y = 0.64, label = "Fear of crime =  1"), col = 'Gray47') 
pdf("low_gini_fear_twostage.pdf")
plot.varyfear.low
dev.off()

plot.varyfear.high <- ggplot(subset(plot_df, fear.inc.high == 1 )) +
  geom_line(aes(x=income.range,y=point_estimate,group=fear.inc.type,col=fear.inc.type), show.legend = "T") +
  geom_ribbon(aes(x=income.range,ymin=point_estimate - 1.96*standard_errors,ymax=point_estimate + 
                    1.96*standard_errors, group=fear.inc.type,fill=fear.inc.type), alpha = 0.6) +
  xlab('Income') + ylab('Probability of Supporting Redistribution') + theme_classic() + xlim(-5,5) + ylim(0.5, 0.9) +
  scale_color_grey(guide = F) + scale_fill_grey(labels = c("No Fear", "Fear")) + 
  labs(fill = "", title = "High Regional Inequality (Two-Stage Probit)") + theme(plot.title = element_text(hjust = 0.5))
#geom_text(aes(x = 20, y = 0.23, label = "Fear of crime =  0"))  + 
#geom_text(aes(x = 25, y = 0.64, label = "Fear of crime =  1"), col = 'Gray47') 
pdf("high_gini_fear_twostage.pdf")
plot.varyfear.high
dev.off()

# correct se's and pi's for this model 

fear.inc.tab <- lapply(1:5, FUN = function(dat){
  fit.twostage <- glm(rincd_d ~ modeled.fear*incdist + Rgini*incdist + modeled.fear*Rgini + age + female + eduyrs + transue + 
                        transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                        skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                      family = binomial(link="probit"),
                      data = na.omit(rueda.new[rueda.new$`_mi_m` == dat,]))
  df <- data.frame(do.call(cbind, list(fit.twostage$coefficients, summary(fit.twostage)$coefficients[, 2])))
  names(df) <- c('coef', 'se')                
  return(df)
})

fit.twostage <- glm(rincd_d ~ modeled.fear*incdist + Rgini*incdist + modeled.fear*Rgini + age + female + eduyrs + transue + 
                      transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                      skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                    family = binomial(link="probit"),
                    data = na.omit(rueda.new[rueda.new$`_mi_m` == 2,]))

#apply functions

coefs_f_inc.2 <- point_fn(fear.inc.tab)
ses_f_inc.2 <- se_fn(fear.inc.tab)
names(coefs_f_inc.2) <- names(coef(fit.twostage))
names(ses_f_inc.2) <- names(coef(fit.twostage))
stargazer(mediator.fit, fit.twostage, type = "latex", title = "Two-Stage Probit Regression Results", 
          coef = list(coefs_fear, coefs_f_inc.2), se = list(ses_fear, ses_f_inc.2))


################################################################
### REDISTRIBUTION PREFERENCES BY FEAR FOR MEDIATION ARGUMENT ## 
################################################################

multiple <- lapply(1:5, FUN = function(dat){
  
  outcome.fit <- glm(rincd_d ~ fear_d*incdist + Rgini*incdist + fear_d*Rgini + age + female + eduyrs + transue + 
                       transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                       skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                     family = binomial(link="probit"),
                     data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  
  sim.beta <- rmvnorm(10^5,
                      mean = coef(outcome.fit),
                      sigma = vcov(outcome.fit))
  
  #loop over income range
  sim.vec.high <- apply(model.matrix(outcome.fit), MARGIN = 2, median)
  sim.vec.low <- sim.vec.high
  sim.vec.high['Rgini'] <- 0.38
  sim.vec.low['Rgini'] <- 0.29
  sim.vec.high.f <- sim.vec.high
  sim.vec.high.f['fear_d'] <- 1
  sim.vec.high.f['fear_d:Rgini'] <- 0.38
  sim.vec.low.f <- sim.vec.low
  sim.vec.low.f['fear_d'] <- 1
  sim.vec.low.f['fear_d:Rgini'] <- 0.29
  
  fear.inc <- lapply(income.range, FUN = function(i){
    sim.vec.high['incdist'] <- i
    sim.vec.high['incdist:Rgini'] <- 0.38*i
    sim.vec.low['incdist'] <- i
    sim.vec.low['incdist:Rgini'] <- 0.29*i
    sim.vec.high.f['incdist'] <- i
    sim.vec.high.f['incdist:Rgini'] <- 0.38*i
    sim.vec.high.f['fear_d:incdist'] <- i
    sim.vec.low.f['incdist'] <- i
    sim.vec.low.f['incdist:Rgini'] <- 0.29*i
    sim.vec.low.f['fear_d:incdist'] <- i
    pr.high <- pnorm(sim.beta%*%sim.vec.high)
    pr.low <- pnorm(sim.beta%*%sim.vec.low)
    pr.high.f <- pnorm(sim.beta%*%sim.vec.high.f)
    pr.low.f <- pnorm(sim.beta%*%sim.vec.low.f)
    o <- sapply(list(pr.high, pr.low, pr.high.f, pr.low.f),
                FUN=function(j){c(mean(j), sd(j), quantile(j, c(0.025, 0.5, 0.975)))})
    o <- data.frame(t(o))
    names(o) <- c('mean', 'se', 'p025','median', 'p975')
    o$type <- c('High gini', 'Low gini', 'High gini (fear)', 'Low gini (fear)')
    o$high <- c(1, 0, 1, 0)
    o$fear <- c(0, 0, 1, 1)
    o$income <- i
    o$mi <- dat
    return(o) })
  
  fear.df <- as.data.frame(do.call(rbind, fear.inc))
  return(fear.df)
})

fear.inc <- as.data.frame(do.call(rbind, multiple))
pi <- data.frame(cbind(fear.inc[fear.inc$mi == 1, ]$mean, fear.inc[fear.inc$mi == 2, ]$mean, 
                       fear.inc[fear.inc$mi == 3, ]$mean, fear.inc[fear.inc$mi == 4, ]$mean,
                       fear.inc[fear.inc$mi == 5, ]$mean))

point_estimate <- apply(pi, MARGIN = 1, FUN = mean)

#create standard errors 
se <- data.frame(fear.inc[fear.inc$mi == 1, ]$se, fear.inc[fear.inc$mi == 2, ]$se, 
                 fear.inc[fear.inc$mi == 3, ]$se, fear.inc[fear.inc$mi == 4, ]$se,
                 fear.inc[fear.inc$mi == 5, ]$se)

standard_errors <- sapply(1:nrow(pi), FUN = function(i){
  err <- mean(as.numeric(se[i, ])^2) + 
    (sum((as.numeric(pi[i, ]) - mean(as.numeric(pi[i, ])))^2)/(ncol(pi) - 1))*(1 + 1/ncol(pi))
  return(sqrt(err))
})

plot_df1 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == '1' & fear.inc.high == 1), income.range))
plot_df2 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == '1' & fear.inc.high == 0), income.range))
plot_df3 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == 0 & fear.inc.high == 1), income.range))
plot_df4 <- data.frame(cbind(subset(data.frame(standard_errors, point_estimate, fear.inc$type, fear.inc$fear, fear.inc$high),
                                    fear.inc.fear == 0 & fear.inc.high == 0), income.range))
plot_df <- as.data.frame(rbind(plot_df1, plot_df2, plot_df3, plot_df4))

plot.varyfear.low <- ggplot(subset(plot_df, fear.inc.high == 0 )) +
  geom_line(aes(x=income.range,y=point_estimate,group=fear.inc.type,col=fear.inc.type)) +
  geom_ribbon(aes(x=income.range,ymin=point_estimate - 1.96*standard_errors,ymax=point_estimate + 
                    1.96*standard_errors, group=fear.inc.type,fill=fear.inc.type), alpha = 0.6) +
  xlab('Income') + ylab('Probability of Supporting Redistribution') + theme_classic() + 
  scale_color_grey(guide = F) + scale_fill_grey(labels = c("No Fear", "Fear")) + labs(fill = "", title = "Low Regional Inequality") + 
  xlim(-5,5) + ylim(0.5, 0.9) + theme(plot.title = element_text(hjust = 0.5))
#geom_text(aes(x = 10, y = 0.2, label = "Fear of crime =  0"))  + 
#geom_text(aes(x = 25, y = 0.64, label = "Fear of crime =  1"), col = 'Gray47') 
pdf("low_gini_fear.pdf")
plot.varyfear.low
dev.off()


plot.varyfear.high <- ggplot(subset(plot_df, fear.inc.high == 1 )) +
  geom_line(aes(x=income.range,y=point_estimate,group=fear.inc.type,col=fear.inc.type), show.legend = "T") +
  geom_ribbon(aes(x=income.range,ymin=point_estimate - 1.96*standard_errors,ymax=point_estimate + 
                    1.96*standard_errors, group=fear.inc.type,fill=fear.inc.type), alpha = 0.6) +
  xlab('Income') + ylab('Probability of Supporting Redistribution') + theme_classic() + xlim(-5,5) + ylim(0.5, 0.9) +
  scale_color_grey(guide = F) + scale_fill_grey(labels = c("No Fear", "Fear")) + 
  labs(fill = "", title = "High Regional Inequality") + theme(plot.title = element_text(hjust = 0.5))
#geom_text(aes(x = 20, y = 0.23, label = "Fear of crime =  0"))  + 
#geom_text(aes(x = 25, y = 0.64, label = "Fear of crime =  1"), col = 'Gray47') 
pdf("high_gini_fear.pdf")
plot.varyfear.high
dev.off()

# correct se's and pi's for this model 
fear.inc.tab <- lapply(1:5, FUN = function(dat){
  outcome.fit <- glm(rincd_d ~ fear_d*incdist + Rgini*incdist + fear_d*Rgini + age + female + eduyrs + transue + 
                       transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                       skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                     family = binomial(link="probit"),
                     data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  df <- data.frame(do.call(cbind, list(outcome.fit$coefficients, summary(outcome.fit)$coefficients[, 2])))
  names(df) <- c('coef', 'se')                
  return(df)
})

outcome.fit <- glm(rincd_d ~ fear_d*incdist + Rgini*incdist + fear_d*Rgini + age + female + eduyrs + transue + 
                     transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                     skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                   family = binomial(link="probit"),
                   data = na.omit(rueda[rueda$`_mi_m` == 2,]))

#apply functions

coefs_f_inc <- round(point_fn(fear.inc.tab),3)
ses_f_inc <- round(se_fn(fear.inc.tab), 3)
names(coefs_f_inc) <- names(coef(outcome.fit))
names(ses_f_inc) <- names(coef(outcome.fit))
stargazer(outcome.fit, type = "latex", title = "Mediation Model: Probit Regression Results", coef = list(coefs_f_inc), se = list(ses_f_inc))


############################
#### MEDIATION ANALYSIS ####
############################

mediator.fit <- glm(fear_d ~ Rgini + incdist + age + female + eduyrs + transue + transnlf + 
                      hhmmb +   lowsup + smempl + whcol + blcol + noclass + victim  + 
                      Rforeign + Rgdp, family = binomial(link="probit"),
                    data = na.omit(rueda[rueda$`_mi_m` == 2,]))


outcome.fit <- glm(rincd_d ~ fear_d*incdist + Rgini*incdist + fear_d*Rgini + age + female + eduyrs + transue + 
                     transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + 
                     skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                   family = binomial(link="probit"),
                   data = na.omit(rueda[rueda$`_mi_m` == 2,]))


mediation <- mediate(mediator.fit, outcome.fit, treat = "Rgini", mediator = "fear_d", boot = TRUE, control.value = 0.292, 
                     treat.value = 0.381, 
                     #covariates = list(incdist = 3.05)
                     sims = 100)
summary(mediation)

save.test <- test.modmed(mediation, covariates.1 = list(incdist = 1.12), covariates.2 = list(incdist = -1.69), sims = 20)

pdf("mediation_results.pdf")
plot(mediation)
dev.off()

#draw 10000 coefficients
sim.beta <- rmvnorm(10^5,
                    mean = coef(outcome.fit),
                    sigma = vcov(outcome.fit))

#simulate predicted probabilities with other values at median 
sim.vec.high <- apply(model.matrix(outcome.fit), MARGIN = 2, median)
sim.vec.low <- sim.vec.high
sim.vec.high['Rgini'] <- 0.38
sim.vec.low['Rgini'] <- 0.29
sim.vec.high.f <- sim.vec.high
sim.vec.high.f['fear_d'] <- 1
sim.vec.low.f <- sim.vec.low #replaced sim.vec.high 
sim.vec.low.f['fear_d'] <- 1

#loop over values of income: income.range

fear <- lapply(income.range, FUN = function(i){
  sim.vec.high['incdist'] <- i
  sim.vec.high['incdist:Rgini'] <- 0.38*i
  sim.vec.low['incdist'] <- i
  sim.vec.low['incdist:Rgini'] <- 0.29*i
  sim.vec.high.f['incdist'] <- i
  sim.vec.high.f['incdist:Rgini'] <- 0.38*i
  sim.vec.high.f['fear_d:incdist'] <- i
  sim.vec.low.f['incdist'] <- i
  sim.vec.low.f['incdist:Rgini'] <- 0.29*i
  sim.vec.low.f['fear_d:incdist'] <- i
  pr.high <- pnorm(sim.beta%*%sim.vec.high)
  pr.low <- pnorm(sim.beta%*%sim.vec.low)
  pr.high.f <- pnorm(sim.beta%*%sim.vec.high.f)
  pr.low.f <- pnorm(sim.beta%*%sim.vec.low.f)
  o <- sapply(list(pr.high, pr.low, pr.high.f, pr.low.f),
              FUN=function(j){c(mean(j), quantile(j, c(0.025, 0.5, 0.975)))})
  o <- data.frame(t(o))
  names(o) <- c('mean', 'p025','median', 'p975')
  o$type <- c('High gini', 'Low gini', 'High gini (fear)', 'Low gini (fear)')
  o$fear <- c(0, 0, 1, 1)
  o$income <- i
  return(o) 
})

fear.df <- as.data.frame(do.call(rbind, fear))

pred.plot.f <- ggplot(subset(fear.df, fear == 1)) +
  geom_line(aes(x=income,y=mean,group=type,col=type)) +
  geom_ribbon(aes(x=income,ymin=p025,ymax=p975, group=type,fill=type), alpha = 0.25) +
  xlab('Income') + ylab('Probability of Supporting Redistribution') + theme_bw() + scale_color_grey(guide = F) +
  scale_fill_grey(labels = c("High Gini", "Low Gini")) + labs(fill = "Inequality", title = "Fear") + 
  xlim(-5,5) + ylim(0.5, 0.9) + theme(plot.title = element_text(hjust = 0.5))
pdf("subset_fear.pdf")
pred.plot.f
dev.off()

pred.plot <- ggplot(subset(fear.df, fear == 0)) +
  geom_line(aes(x=income,y=mean,group=type,col=type)) +
  geom_ribbon(aes(x=income,ymin=p025,ymax=p975, group=type,fill=type), alpha = 0.25) +
  xlab('Income') + ylab('Probability of Supporting Redistribution') + theme_bw() + scale_color_grey(guide = F) +
  scale_fill_grey(labels = c("High Gini", "Low Gini")) + labs(fill = "Inequality", title = "No Fear") + 
  xlim(-5,5) + ylim(0.5, 0.9) + theme(plot.title = element_text(hjust = 0.5))
pdf("subset_nofear.pdf")
pred.plot
dev.off()


gini_less <- glm(rincd_d ~ Rforeign*incdist + fear_d + imueclt  + age + female + eduyrs + transue + 
                   transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + fear_d +
                   skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                 family = binomial(link="probit"),
                 data = na.omit(rueda[rueda$`_mi_m` == 2,]))

gini <- glm(rincd_d ~ Rforeign*incdist + Rgini + fear_d + imueclt  + age + female + eduyrs + transue + 
              transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + fear_d +
              skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
            family = binomial(link="probit"),
            data = na.omit(rueda[rueda$`_mi_m` == 2,]))

stargazer(gini_less, gini)

sim.beta <- rmvnorm(10^5,
                    mean = coef(gini_less),
                    sigma = vcov(gini_less))

no_gini_high <- apply(model.matrix(gini_less), MARGIN = 2, median)
no_gini_low <- apply(model.matrix(gini_less), MARGIN = 2, median)

inc.low <- -1.89
inc.high <- 1.63
no_gini_high['incdist'] <- inc.high
no_gini_low['incdist'] <- inc.low


#loop over Rforeignrange
foreign.range <- seq(min(rueda$Rforeign), max(rueda$Rforeign), 0.01) 
foriegn.pr <- lapply(foreign.range, FUN=function(i){
  no_gini_high['Rforeign'] <- i
  no_gini_low['Rforeign'] <- i
  no_gini_high['Rforeign:incdist'] <- i*inc.high
  no_gini_low['Rforeign:incdist'] <- i*inc.low
  pr.high <- pnorm(sim.beta %*% no_gini_high)
  pr.low <- pnorm(sim.beta %*% no_gini_low)
  o <- sapply(list(pr.high, pr.low),
              FUN=function(j){c(mean(j), sd(j), quantile(j, c(0.025, 0.5, 0.975)))})
  o <- data.frame(t(o))
  names(o) <- c('mean', 'se', 'p025','median', 'p975')
  o$high <- c('High', 'Low')
  o$Rforeign <- i
  return(o)
})

plot_df1 <- data.frame(cbind(subset(as.data.frame(do.call(rbind, foriegn.pr)),
                                    high == 'High'), foreign.range))
plot_df2 <- data.frame(cbind(subset(as.data.frame(do.call(rbind, foriegn.pr)),
                                    high == 'Low'), foreign.range))

foreign.df <- as.data.frame(rbind(plot_df1, plot_df2))

gini_less_plot <- ggplot(foreign.df) +
  geom_line(aes(x=Rforeign,y=mean, group = factor(high), col = factor(high))) +
  geom_ribbon(aes(x=Rforeign,ymin=p025,ymax=p975,  group = factor(high), fill = factor(high)), alpha = 0.25) +
  xlab('Rforeign') + ylab('Probability of supporting redistribution') + theme_bw() + 
  scale_color_grey() + scale_fill_grey() +  expand_limits(y=c(0.6, 0.75)) + labs(fill = 'High income') + 
  labs(fill = 'Income', col = 'Income') + ggtitle('Excluding Rgini as control (as in original paper)')

########################################################################################################################
## First differences plot for Rforeign controlling for Rgini
########################################################################################################################

gini_table <- lapply(1:5, FUN = function(dat){
  pref.fit <- glm(rincd_d ~ Rforeign*incdist + Rgini + fear_d + imueclt  + age + female + eduyrs + transue + 
                    transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + fear_d +
                    skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
      family = binomial(link="probit"),
      data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  df <- data.frame(do.call(cbind, list(pref.fit$coefficients, summary(pref.fit)$coefficients[, 2])))
  names(df) <- c('coef', 'se')                
  return(df)
})

less_table <- lapply(1:5, FUN = function(dat){
  pref.fit <- glm(rincd_d ~ Rforeign*incdist + fear_d + imueclt  + age + female + eduyrs + transue + 
                    transnlf + hhmmb + lowsup + smempl + whcol + blcol + noclass + fear_d +
                    skillshg + skillssp + Rforeign + Ruerate + Rtech + Rgdp,
                  family = binomial(link="probit"),
                  data = na.omit(rueda[rueda$`_mi_m` == dat,]))
  df <- data.frame(do.call(cbind, list(pref.fit$coefficients, summary(pref.fit)$coefficients[, 2])))
  names(df) <- c('coef', 'se')                
  return(df)
})

gini_coefs <- point_fn(gini_table)
gini_ses <- se_fn(gini_table)
names(gini_coefs) <- row.names(gini_table[[1]])
names(gini_ses) <- row.names(gini_table[[1]])

less_coefs <- point_fn(less_table)
less_ses <- se_fn(less_table)
names(less_coefs) <- row.names(less_table[[1]])
names(less_ses) <- row.names(less_table[[1]])


# now with Gini included

sim.beta <- rmvnorm(10^5,
                    mean = gini_coefs,
                    sigma = vcov(gini))
sim.beta.less <- rmvnorm(10^5,
                    mean = coef(gini_less),
                    sigma = vcov(gini_less))

gini_high_foreign_high <- apply(model.matrix(gini), MARGIN = 2, median)
gini_high_foreign_low <- apply(model.matrix(gini), MARGIN = 2, median)
gini_low_foreign_high <- apply(model.matrix(gini), MARGIN = 2, median)
gini_low_foreign_low <- apply(model.matrix(gini), MARGIN = 2, median)

less_high_foreign_high <- apply(model.matrix(gini_less), MARGIN = 2, median)
less_high_foreign_low <- apply(model.matrix(gini_less), MARGIN = 2, median)
less_low_foreign_high <- apply(model.matrix(gini_less), MARGIN = 2, median)
less_low_foreign_low <- apply(model.matrix(gini_less), MARGIN = 2, median)


inc.low <- -1.89
inc.high <- 1.63
rforeign.low <- quantile(rueda$Rforeign, 0.2)
rforeign.high <- quantile(rueda$Rforeign, 0.8)

gini_high_foreign_high['incdist'] <- inc.high
gini_high_foreign_high['Rforeign'] <- rforeign.high
gini_high_foreign_low['incdist'] <- inc.high
gini_high_foreign_low['Rforeign'] <- rforeign.low
gini_low_foreign_high['incdist'] <- inc.low
gini_low_foreign_high['Rforeign'] <- rforeign.high
gini_low_foreign_low['incdist'] <- inc.low
gini_low_foreign_low['Rforeign'] <- rforeign.low

less_high_foreign_high['incdist'] <- inc.high
less_high_foreign_high['Rforeign'] <- rforeign.high
less_high_foreign_low['incdist'] <- inc.high
less_high_foreign_low['Rforeign'] <- rforeign.low
less_low_foreign_high['incdist'] <- inc.low
less_low_foreign_high['Rforeign'] <- rforeign.high
less_low_foreign_low['incdist'] <- inc.low
less_low_foreign_low['Rforeign'] <- rforeign.low

gini_high_foreign_high['Rforeign:incdist'] <- inc.high * rforeign.high
gini_high_foreign_low['Rforeign:incdist'] <- inc.high * rforeign.low
gini_low_foreign_high['Rforeign:incdist'] <- inc.low * rforeign.high
gini_low_foreign_low['Rforeign:incdist'] <- inc.low * rforeign.low

less_high_foreign_high['Rforeign:incdist'] <- inc.high * rforeign.high
less_high_foreign_low['Rforeign:incdist'] <- inc.high * rforeign.low
less_low_foreign_high['Rforeign:incdist'] <- inc.low * rforeign.high
less_low_foreign_low['Rforeign:incdist'] <- inc.low * rforeign.low


gini.pr.high.high <- pnorm(sim.beta %*% gini_high_foreign_high)
gini.pr.high.low <- pnorm(sim.beta %*% gini_high_foreign_low)
gini.pr.low.high <- pnorm(sim.beta %*% gini_low_foreign_high)
gini.pr.low.low <- pnorm(sim.beta %*% gini_low_foreign_low)

less.pr.high.high <- pnorm(sim.beta.less %*% less_high_foreign_high)
less.pr.high.low <- pnorm(sim.beta.less %*% less_high_foreign_low)
less.pr.low.high <- pnorm(sim.beta.less %*% less_low_foreign_high)
less.pr.low.low <- pnorm(sim.beta.less %*% less_low_foreign_low)


gini.high.diff <- gini.pr.high.high - gini.pr.high.low
gini.low.diff <- gini.pr.low.high - gini.pr.low.low
less.high.diff <- less.pr.high.high - less.pr.high.low
less.low.diff <- less.pr.low.high - less.pr.low.low


o <- sapply(list(gini.high.diff, gini.low.diff, less.high.diff, less.low.diff), 
            FUN = function(j){c(mean(j), sd(j), quantile(j, c(0.025, 0.5, 0.975)))})
o <- data.frame(t(o))
names(o) <- c('mean','se','p025','median','p975')
o$inc <- c(1,0,1,0)
o$gini <- c('Yes','Yes','No','No')

o$inc_gini <- paste0(as.character(o$inc), o$gini)
p <- ggplot() + geom_point(aes(x = mean, y = inc_gini, colour = gini), data = o, size= 4) + 
  geom_errorbarh(aes(x = mean, xmin = p025, xmax = p975, y = inc_gini, colour = gini), height = 0, size = 1, data = o) + 
  theme_classic() + geom_vline(aes(xintercept = 0), linetype = 'dashed', color = 'grey30') + scale_color_grey() + 
  labs(x = 'Difference in Pred. Probability', y = 'Income', color = 'Controls for RGini') + 
  scale_y_discrete(labels = c('Low','Low', 'High', 'High'))
p
ggsave(p, width = 9, height = 7, file = 'first_diff_Rgini.png')


